#! /usr/bin/python
import os
import math
wordsDic = {}
TrainDir = "train"
trainFile = file("trainFile",'w')
dic = file("dic",'w')
fileList = os.listdir(TrainDir)
value = 0
for fileName in fileList:
	if fileName.find("business") > 0:
		trainFile.write('1')
	elif fileName.find("sport") > 0:
		trainFile.write('2')
	else:
		trainFile.write('3')
	trainFile.write(' ')
	infile = file(TrainDir+'/'+fileName, 'r')
	sline = infile.readline().strip()
	while len(sline) > 0:
		words = sline.split(' ')
		for word in words:
			if word not in wordsDic:
				value = value+1
				wordsDic[word] = value
			trainFile.write(str(wordsDic[word]))
			trainFile.write(' ')
		sline = infile.readline().strip()
	trainFile.write('#')
	trainFile.write('\n')
	infile.close()
trainFile.close()
for key in wordsDic.keys():
	dic.write(str(key)+' '+str(wordsDic[key]))
	dic.write('\n')
dic.close()				

